package com.dog.lucene.first;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.dog.domain.Crawl;


public class IndexSearch {
	
	ArrayList lis=new ArrayList();
	List<Crawl> lis1=new ArrayList();
	
	public List doSearch(Query query) throws InvalidTokenOffsetsException {
		
		// 创建IndexSearcher
		// 指定索引库的地址
		try {
			File indexFile = new File("C:\\test1\\aaa\\");
			Directory directory = FSDirectory.open(indexFile);
			IndexReader reader = DirectoryReader.open(directory);
			IndexSearcher searcher = new IndexSearcher(reader);
			// 通过searcher来搜索索引库
			// 第二个参数：指定需要显示的顶部记录的N条
			TopDocs topDocs = searcher.search(query, 20);

			// 根据查询条件匹配出的记录总数
			int count = topDocs.totalHits;
	//	System.out.println("匹配出的记录总数:" + count);  //注意，这个匹配的记录数目是不准的
			// 根据查询条件匹配出的记录
//			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			
			String filed="description";  
			
//			TopDocs top=searcher.search(query, 100);  
		    QueryScorer score=new QueryScorer(query,filed);//传入评分  
		    SimpleHTMLFormatter fors=new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");//定制高亮标签  
		      
		    Highlighter  highlighter=new Highlighter(fors,score);//高亮分析器  
//		     highlighter.setMaxDocCharsToAnalyze(10);//设置高亮处理的字符个数  
		    for(ScoreDoc sd:topDocs.scoreDocs){  
		        Document doc=searcher.doc(sd.doc);  
		        String description=doc.get(filed);  
		        
		        
		        
		   //Lucene中分词的所有信息我们都可以从TokenStream流中获取.   https://www.cnblogs.com/dennisit/p/3258664.html
		        TokenStream token=TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "description", new IKAnalyzer(true));//获取tokenstream  
		        Fragmenter  fragment=new SimpleSpanFragmenter(score);  //根据这个评分新建一个对象
		        
		        highlighter.setTextFragmenter(fragment);  //必须选取最合适的
		        
		        highlighter.setTextFragmenter(new SimpleFragmenter());//设置每次返回的字符数 
		        String str=highlighter.getBestFragment(token, description);//获取高亮的片段，可以对其数量进行限制  
//		          System.out.println(name);
//		         System.out.println("高亮的片段 =====>"+str);  
		        Crawl crawl = new Crawl();
//				book.setId(Integer.parseInt(doc.get("id")));
//				book.setPrice((float) Integer.parseInt(doc.get("price")));
				crawl.setDescription(str);
				crawl.setAbs(doc.get("abs"));
				crawl.setUrl(doc.get("url"));
//				book.setDescription(doc.get("description"));
				lis1.add(crawl);
				
				
		        
		       
		  
		    }  
				
			reader.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return lis1;
		
	}
	
	

	
}
